<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>X2Knowledge - 知识提取器工具</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}">
    <!-- 引入Marked库用于Markdown渲染 -->
    <script src="https://cdn.jsdelivr.net/npm/marked@4.3.0/marked.min.js"></script>
    <!-- 引入highlight.js用于代码高亮 -->
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/highlight.js@11.7.0/styles/github.min.css">
    <script src="https://cdn.jsdelivr.net/npm/highlight.js@11.7.0/highlight.min.js"></script>
    <style>
        .converter-options {
            margin-bottom: 15px;
            display: flex;
            flex-direction: column;
            gap: 5px;
        }
        .converter-option {
            display: flex;
            align-items: center;
            margin-bottom: 5px;
        }
        .converter-option input {
            margin-right: 8px;
        }
        .converter-description {
            margin-left: 25px;
            font-size: 0.9em;
            color: #666;
            margin-top: -3px;
            margin-bottom: 8px;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="language-switch">
            <button class="language-btn active" data-lang="zh">中文</button>
            <button class="language-btn" data-lang="en">English</button>
        </div>
        
        <header>
            <h1 data-i18n="title">X2Knowledge - 知识提取器工具</h1>
            <p data-i18n="subtitle">知识库原生支持：输出带元数据的Markdown，与RAG/LangChain/Agent等框架无缝对接</p>
            <div class="nav-links">
                <a href="/api-docs" class="nav-link" data-i18n="api-docs-link">API文档</a>
            </div>
        </header>

        <main>
            <div class="tab-controls">
                <button class="tab-button" data-tab="text" data-i18n="tab-text">转为文本</button>
                <button class="tab-button active" data-tab="markdown" data-i18n="tab-markdown">转为Markdown</button>
            </div>
            
            <div class="tab-content" id="tab-text">
                <div class="upload-container" id="dropAreaText">
                    <div class="upload-icon">
                        <svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                            <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
                            <polyline points="17 8 12 3 7 8"></polyline>
                            <line x1="12" y1="3" x2="12" y2="15"></line>
                        </svg>
                    </div>
                    <p><span data-i18n="drop-files">拖拽文件到此处或</span><label for="fileInputText" class="file-label" data-i18n="choose-file">选择文件</label></p>
                    <input type="file" id="fileInputText" accept=".doc,.docx,.xls,.xlsx,.ppt,.pptx,.pdf,.txt,.md,.xml" hidden>
                    <p class="file-types" data-i18n="supported-types-text">支持的文件类型: .doc, .docx, .xls, .xlsx, .ppt, .pptx, .pdf, .txt, .md, .xml</p>
                    <p class="file-note" data-i18n="doc-note">注意: 处理旧版Word文档(.doc)可能需要更长时间，建议转换为.docx格式后上传</p>
                    <p class="ocr-note" data-i18n="ocr-feature">OCR功能: 自动识别文档中的图片文字</p>
                    <p class="encoding-note" data-i18n="encoding-feature">编码功能: 自动将文档转换为UTF-8编码，解决中文乱码问题</p>
                </div>
            </div>
            
            <div class="tab-content active" id="tab-markdown">
                <div class="upload-container" id="dropAreaMarkdown">
                    <div class="upload-icon">
                        <svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                            <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
                            <polyline points="17 8 12 3 7 8"></polyline>
                            <line x1="12" y1="3" x2="12" y2="15"></line>
                        </svg>
                    </div>
                    
                    <div class="converter-options">
                        <div class="converter-option">
                            <input type="radio" id="markitdown-converter" name="converter" value="markitdown" checked>
                            <label for="markitdown-converter" data-i18n="markitdown-converter-label">使用 MarkItDown 转换器 (默认)</label>
                        </div>
                        <p class="converter-description" data-i18n="markitdown-converter-desc">对原生Office格式文件（DOCX, XLSX, PPTX）支持非常好，速度快、准确率高，但对PDF格式文件效果较差</p>
                        
                        <div class="converter-option">
                            <input type="radio" id="docling-converter" name="converter" value="docling">
                            <label for="docling-converter" data-i18n="docling-converter-label">使用 Docling 转换器</label>
                        </div>
                        <p class="converter-description">
                            <span data-i18n="docling-converter-desc">对PDF有优化的表格识别和VLM，准确率大大提升，支持更多输入格式，但需要CUDA环境最佳（当前使用CPU加速）</span>
                            <span data-i18n="docling-warning" style="color: red; font-weight: bold;">且时间较长，不建议在本测试环境使用！</span>
                        </p>
                        
                        <div class="converter-option">
                            <input type="radio" disabled id="marker-converter" name="converter" value="marker">
                            <label for="marker-converter" data-i18n="marker-converter-label">使用 Marker 转换器</label>
                        </div>
                        <p class="converter-description">
                            <span data-i18n="marker-converter-desc">高精度文档转换器，优秀的表格和公式处理能力，支持多种文档格式和图片</span>
                            <span data-i18n="marker-warning" style="color: orange; font-weight: bold;">需要安装PyTorch，性能取决于系统配置</span>
                        </p>
                    </div>
                    
                    <p><span data-i18n="drop-files">拖拽文件到此处或</span><label for="fileInputMarkdown" class="file-label" data-i18n="choose-file">选择文件</label></p>
                    <input type="file" id="fileInputMarkdown" accept=".doc,.docx,.xls,.xlsx,.ppt,.pptx,.pdf,.txt,.md,.mp3,.wav,.csv,.html,.xhtml,.png,.jpg,.jpeg,.tiff,.bmp,.xml" hidden>
                    <p class="file-types" id="supportedFileTypes" data-i18n="supported-types-md">支持的文件类型: .doc, .docx, .xls, .xlsx, .ppt, .pptx, .pdf, .txt, .md, .mp3, .wav, .xml</p>
                    <p class="file-note" data-i18n="markdown-note-structure">注意: 转换为Markdown格式可以更好地保留文档结构</p>
                    <p class="markdown-note" data-i18n="markdown-feature-structure">Markdown功能: 保留文档标题、列表、表格等结构</p>
                    <p class="markdown-note" data-i18n="markdown-feature-audio">新增功能: 支持音频文件(.mp3, .wav)转换为Markdown描述</p>
                </div>
            </div>

            <div class="result-container" id="resultContainer" style="display: none;">
                <div class="result-header">
                    <h2 data-i18n="conversion-result">转换结果</h2>
                    <div class="result-actions">
                        <button id="copyBtn" class="btn" data-i18n="copy-text">复制文本</button>
                        <button id="downloadBtn" class="btn" data-i18n="download-text">下载文件</button>
                        <button id="newConvertBtn" class="btn" data-i18n="new-conversion">新的转换</button>
                        <button id="fullscreenBtn" class="btn fullscreen-btn" style="display: none;"><i class="fullscreen-icon"></i><span data-i18n="fullscreen">全屏预览</span></button>
                    </div>
                </div>
                <div class="result-content">
                    <pre id="resultText"></pre>
                </div>
            </div>

            <div class="loading" id="loadingIndicator" style="display: none;">
                <div class="spinner"></div>
                <p data-i18n="converting">正在转换文件，请稍候...</p>
                <p class="loading-note" data-i18n="doc-loading-note">旧版格式文件(.doc)可能需要较长时间</p>
                <p class="ocr-loading-note" data-i18n="ocr-loading-note">正在进行图片文字识别...</p>
            </div>
        </main>

        <footer>
            <p data-i18n="footer">© 2025 X2Knowledge v0.4.0 - 知识提取器工具 | 使用Flask和JavaScript构建</p>
            <div class="github-links">
                <a href="https://github.com/leonda123/X2Knowledge.git" class="github-link" target="_blank">
                    <svg class="github-icon" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="currentColor">
                        <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
                    </svg>
                    GitHub
                </a>
                <a href="https://gitee.com/leonda/X2Knowledge.git" class="github-link" target="_blank">
                    <svg class="github-icon" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="currentColor">
                        <path d="M8 0a8 8 0 0 0-8 8 8 8 0 0 0 8 8 8 8 0 0 0 8-8 8 8 0 0 0-8-8zm0.24 3.088a4.912 4.912 0 0 1 4.794 5.542H5.726c-.02 1.311 1.425 1.993 2.761 1.993 1.155 0 2.206-.606 2.657-1.44h2.342a5.083 5.083 0 0 1-5.038 3.392 4.919 4.919 0 0 1-4.82-5.585 4.916 4.916 0 0 1 4.612-3.902z"/>
                    </svg>
                    Gitee
                </a>
            </div>
        </footer>
    </div>

    <!-- 全屏预览覆盖层 -->
    <div id="fullscreenOverlay" class="fullscreen-overlay">
        <div class="fullscreen-header">
            <h3 data-i18n="fullscreen-preview">预览内容</h3>
            <button id="closeFullscreenBtn" class="close-fullscreen-btn">×</button>
        </div>
        <div class="fullscreen-content">
            <div id="fullscreenPreview" class="markdown-preview fullscreen-preview"></div>
        </div>
    </div>

    <script src="{{ url_for('static', filename='js/i18n.js') }}"></script>
    <script src="{{ url_for('static', filename='js/main.js') }}"></script>
    <script>
        // 配置marked选项
        marked.setOptions({
            breaks: true,             // 将换行符转换为<br>
            gfm: true,                // 启用GitHub风格的Markdown
            headerIds: true,          // 为标题添加id
            mangle: false,            // 不转义HTML
            tables: true,             // 启用表格支持
            sanitize: false,          // 允许HTML标签
            smartLists: true,         // 使用更智能的列表行为
            smartypants: true,        // 使用更智能的标点符号
            xhtml: false,             // 不使用xhtml闭合标签
            highlight: function(code, lang) {
                // 如果语言被指定且highlight.js支持，使用高亮
                if (lang && hljs.getLanguage(lang)) {
                    try {
                        return hljs.highlight(code, {language: lang}).value;
                    } catch (err) {}
                }
                
                // 使用自动检测语言
                try {
                    return hljs.highlightAuto(code).value;
                } catch (err) {}
                
                // 如果高亮失败，返回原始代码
                return code;
            }
        });
        
        // 创建自定义渲染器，增强表格和图片渲染
        const renderer = new marked.Renderer();
        
        // 自定义表格渲染
        renderer.table = function(header, body) {
            return '<table class="md-table">\n'
                + '<thead>\n'
                + header
                + '</thead>\n'
                + '<tbody>\n'
                + body
                + '</tbody>\n'
                + '</table>\n';
        };
        
        // 自定义图片渲染
        renderer.image = function(href, title, text) {
            return `<img src="${href}" alt="${text}" title="${title || text}" class="md-image">`;
        };
        
        // 应用自定义渲染器
        marked.use({ renderer });
        
        // 全屏预览相关
        document.addEventListener('DOMContentLoaded', function() {
            const fullscreenBtn = document.getElementById('fullscreenBtn');
            const fullscreenOverlay = document.getElementById('fullscreenOverlay');
            const closeFullscreenBtn = document.getElementById('closeFullscreenBtn');
            const fullscreenPreview = document.getElementById('fullscreenPreview');
            const markdownPreview = document.querySelector('.markdown-preview');
            
            // 显示全屏预览
            fullscreenBtn.addEventListener('click', function() {
                // 复制预览内容到全屏预览区域
                fullscreenPreview.innerHTML = markdownPreview.innerHTML;
                
                // 显示全屏覆盖层
                fullscreenOverlay.style.display = 'flex';
                document.body.style.overflow = 'hidden'; // 防止背景滚动
                
                // 为全屏预览中的图片添加点击事件
                const images = fullscreenPreview.querySelectorAll('img.md-image');
                images.forEach(img => {
                    img.addEventListener('click', function() {
                        this.classList.toggle('md-image-fullscreen');
                    });
                });
            });
            
            // 关闭全屏预览
            closeFullscreenBtn.addEventListener('click', function() {
                fullscreenOverlay.style.display = 'none';
                document.body.style.overflow = 'auto'; // 恢复背景滚动
            });
            
            // ESC键关闭全屏预览
            document.addEventListener('keydown', function(e) {
                if (e.key === 'Escape' && fullscreenOverlay.style.display === 'flex') {
                    fullscreenOverlay.style.display = 'none';
                    document.body.style.overflow = 'auto';
                }
            });
        });
    </script>
</body>
</html> 